# Rural Energy for Productive Use (SPI Enterprise Survey Analysis) -------------
# Authors: SP, MA, JU
# Year: 2019
# Revised Year: 2021

# LOAD NECESSARY PACKAGES ------------------------------------------------------

#install.packages(pacman)
library(pacman)

# tidyverse
p_load(dplyr, tidyr, readr, stringr, ggplot2, forcats, readxl, ggrepel, ggExtra,
       gghighlight)

# rspatial
p_load(sf)

# analysis
p_load(stargazer, fixest, cluster, factoextra)

# misc
p_load(here, cowplot, patchwork, readstata13, NbClust)

source(here("R", "custom_functions.R"))

# READ IN FULL DATASET ---------------------------------------------------------

source(here("R", "load_data.R"))

# Define covariates

num_covars <- c("q215_area", "q210_salaried_employees")

bin_covars <- c("q213_own", "q214_pucca")

vill_covars <- c("vill_hh", "vill_markets", "vill_grid_hrs", "vill_grid_all", "vill_bpl")

# RESEARCH QUESTIONS -----------------------------------------------------------

# 5 stat summary of villages
vill_summary <- vill_df %>% 
  transmute(Type = factor(vill_elec_type, 
                          levels = c("Grid (Franchise)", "Grid (Public)", "Grid (Public) + Minigrid")),
         "Households" = vill_hh, 
         "Share of BPL households" = vill_bpl, 
         "Marketplaces" = vill_markets, 
         "Grid supply hours" = vill_grid_hrs, 
         "All hamlets electrified" = vill_grid_all) %>%
  group_by(Type) %>% 
  mutate("Villages Sampled" = n()) %>% 
  pivot_longer(-Type, names_to = "Variable") %>% 
  group_by(Type, Variable) %>% 
  summarise_all(~round(mean(., na.rm = T),2)) %>% 
  pivot_wider(names_from = Type, values_from = value) %>% 
  mutate(Variable = factor(Variable, levels = c("Villages Sampled", "Households", "Share of BPL households", 
                                        "Marketplaces", "Grid supply hours", 
                                        "All hamlets electrified"))) %>% 
  arrange(Variable) %>% 
  mutate(Variable = as.character(Variable))

conncost <- ent_df %>% 
  filter(!is.na(vill_elec_type)) %>% 
  rename(Type = vill_elec_type) %>% 
  transmute(Type = Type,
            "Grid Connection Cost" = q303_grid_initial_cost,
            "Minigrid Connection Cost" = ifelse(Type == "Grid (Public) + Minigrid",q337_mgrid_initial_cost,NA)) %>% 
  group_by(Type) %>% 
  pivot_longer(-Type, names_to = "Variable") %>% 
  group_by(Type, Variable) %>% 
  summarise_all(~round(mean(., na.rm = T),2)) %>% 
  pivot_wider(names_from = Type, values_from = value)

vill_summary <- rbind(vill_summary, conncost)

stargazer(vill_summary, summary = F, rownames = F, float = F,
          digits = 2, out = here("Manuscript", "Tables", "vill_summary.tex"))

# Typical Village Grid Supply Duration
a <- vill_df %>% 
  group_by(vill_elec_type) %>% 
  ggplot(aes(vill_grid_hrs, fill = factor(vill_grid_all))) +
  geom_histogram(bins = 24, position = "dodge") +
  scale_x_continuous(breaks = seq(0,24,4)) +
  scale_fill_viridis_d(drop = F) +
  facet_wrap(~vill_elec_type) +
  labs(x = "Village average grid supply hours",
       y = "Villages",
       fill = "All hamlets grid connected") +
  theme_bw() +
  theme(strip.background = element_rect(fill = "white", color = "black"),
        plot.caption = element_text(hjust = 1),
        legend.position = "top")

b <- ent_df %>% 
  filter(!is.na(q303_grid_initial_cost), q303_grid_initial_cost != 0) %>% 
  group_by(vill_elec_type, vill_grid_all, q011_village_code) %>% 
  summarise(q303_grid_initial_cost = mean(q303_grid_initial_cost)) %>% 
  ggplot(aes(q303_grid_initial_cost, fill = factor(vill_grid_all))) +
  geom_histogram(bins = 60, position = "dodge") +
  coord_cartesian(xlim = c(0,10000)) +
  scale_fill_viridis_d(drop = F) +
  facet_wrap(~vill_elec_type) +
  labs(x = "Village average total connection costs paid by grid-connected MSMEs",
       y = "Villages",
       fill = "All hamlets grid connected") +
  theme_bw() +
  theme(strip.background = element_rect(fill = "white", color = "black"),
        plot.caption = element_text(hjust = 1),
        legend.position = "top")# Enterprise types surveyed

a/b + plot_layout(guides = "collect") & theme(legend.position = "top")

ggsave(here("Manuscript", "Figures", "1_supplyhours.png"),
       device = "png", width = 10, height = 6)

ent_df %>% 
  group_by(ent_cat, ent_type) %>% 
  dplyr::count() %>% 
  ungroup() %>% 
  mutate(prop = scales::percent(n/ sum(n), accuracy = 0.1)) %>% 
  rename(Category = ent_cat, Type = ent_type, Observations = n, Share = prop) %>% 
  stargazer(summary = F, rownames = F, out = here::here(
    "Manuscript", "Tables", "ent_types.tex"
  ))

# Five stat summary of regression data
ent_df %>% 
  select(elecsrc_gridany, elecsrc_ogany, monthly_consumption, vill_covars, num_covars, bin_covars) %>% 
  select("MSE grid connected" = elecsrc_gridany,
         "MSE uses OG backup" = elecsrc_ogany,
         "MSE monthly kWh" = monthly_consumption,
         "Village grid supply hours" = vill_grid_hrs,
         "Village households" = vill_hh, 
         "Village share of BPL households" = vill_bpl, 
         "Village marketplaces" = vill_markets, 
         "Village hamlets electrified" = vill_grid_all,
         "MSE floor area" = q215_area,
         "MSE salaried employees" = q210_salaried_employees,
         "MSE building pucca" = q214_pucca,
         "MSE building owned" = q213_own) %>% 
  psych::describe(skew = F, check = T) %>% 
  transmute(Mean = mean, SD = sd, Min = min, Max = max, Observations = n) %>% 
  stargazer(summary = F, float = F,
            digits = 2, out = here("Manuscript", "Tables", "lm_summary_all.tex"))

ent_df %>% 
  filter(elecsrc %in% c("Grid", "Grid + Off-grid")) %>% 
  select(elecsrc_gridany, elecsrc_ogany, monthly_consumption, vill_covars, num_covars, bin_covars) %>% 
  select("MSE grid connected" = elecsrc_gridany,
         "MSE uses OG backup" = elecsrc_ogany,
         "MSE monthly kWh" = monthly_consumption,
         "Village grid supply hours" = vill_grid_hrs,
         "Village households" = vill_hh, 
         "Village share of BPL households" = vill_bpl, 
         "Village marketplaces" = vill_markets, 
         "Village hamlets electrified" = vill_grid_all,
         "MSE floor area" = q215_area,
         "MSE salaried employees" = q210_salaried_employees,
         "MSE building pucca" = q214_pucca,
         "MSE building owned" = q213_own) %>% 
  psych::describe(skew = F, check = T) %>% 
  transmute(Mean = mean, SD = sd, Min = min, Max = max, Observations = n)%>% 
  stargazer(summary = F, float = F,
            digits = 2, out = here("Manuscript", "Tables", "lm_summary_gridog.tex"))

# Q1a: Determinants of grid electrification ------------------------------------

ent_df %>% dplyr::count(elecsrc) %>% mutate(prop = n / sum(n), sum = sum(n)) %>% 
  transmute("Electricity source group" = as.character(elecsrc),
            "Total firms" = n,
            Proportion = scales::percent(prop, accuracy = 1)) %>% 
  stargazer(summary = F, rownames = F, float = F,
            digits = 2, out = here("Manuscript", "Tables", "srcgrp.tex"))

ent_df %>% 
  mutate(none = as.numeric((
    select(.,q301_grid_use, q335_mgrid_use, q345_dg_use, q317_shs_use, q412_sl_use, q324_battery_use) %>% 
      rowSums(na.rm = T)) == 0)) %>% 
  summarise(across(c(q301_grid_use, q335_mgrid_use, q345_dg_use, q317_shs_use, q412_sl_use, q324_battery_use, none),
                   list(Mean = ~mean(.,na.rm = T), Sum = ~sum(.,na.rm = T)))) %>% 
  pivot_longer(everything(), names_pattern = "(.+)_(.+)", names_to = c("set", ".value")) %>% 
  transmute(
    "Sources used" = case_when(
      grepl(set, pattern = "q301_grid_use") ~ "Grid",
      grepl(set, pattern = "q335_mgrid_use") ~ "Minigrid",
      grepl(set, pattern = "q345_dg_use") ~ "Generator",
      grepl(set, pattern = "q317_shs_use") ~ "SolarHomeSystem",
      grepl(set, pattern = "q412_sl_use") ~ "SolarLantern",
      grepl(set, pattern = "q324_battery_use") ~ "Battery",
      TRUE ~ "None"),
    `Sources used` = factor(`Sources used`, levels = c("Grid", "Minigrid", "SolarHomeSystem" , "Generator", 
                                                                   "SolarLantern", "Battery", "None")),
    "Total firms" = Sum,
    "Proportion" = scales::percent(Mean, accuracy = 2)
    ) %>% 
  arrange(`Sources used`) %>% 
  mutate(`Sources used` = as.character(`Sources used`)) %>% 
  stargazer(summary = F, rownames = F, float = F,
            digits = 3, out = here("Manuscript", "Tables", "srcs.tex"))

ent_df %>% 
  filter(q301_grid_use == 0, vill_elec_type != ("Grid (Public) + Minigrid"),
         vill_grid_all == 1) %>% 
  select(matches("q301_b_no_grid_reason_all")) %>% 
  transmute(
    "Can't afford" = as.numeric(grepl(q301_b_no_grid_reason_all, pattern = "1") |
                                  grepl(q301_b_no_grid_reason_all, pattern = "4")),
    "Don't need" = as.numeric(grepl(q301_b_no_grid_reason_all, pattern = "2")),
    "Can't get connected" = as.numeric(grepl(q301_b_no_grid_reason_all, pattern = "3")),
    "Service is not good" = as.numeric(grepl(q301_b_no_grid_reason_all, pattern = "5") |
                                         grepl(q301_b_no_grid_reason_all, pattern = "6")),
    "No residential certificate" = as.numeric(grepl(q301_b_no_grid_reason_all, pattern = "7")),
    "Don't know how" = as.numeric(grepl(q301_b_no_grid_reason_all, pattern = "8")),
    "Other" = as.numeric(grepl(q301_b_no_grid_reason_all, pattern = "9"))
  ) %>% 
  summarise_all(~mean(.,na.rm = T)) %>% 
  gather(reason, perc) %>% 
  mutate(reason = forcats::fct_reorder(reason, perc)) %>% 
  ggplot(aes(fct_infreq(reason), perc)) +
  geom_col() +
  geom_label(aes(label = scales::percent(perc, accuracy = 1))) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_flip() +
  labs(x = NULL,
       y = "Proportion of firms, of those without a grid connection (N = 327)") +
  theme_bw() +
  theme(strip.background = element_rect(fill = "white", color = "black"),
        plot.caption = element_text(hjust = 1))

ggsave(here("Manuscript", "Figures", "3_nongrid_reasons.png"),
       device = "png", width = 8, height = 5)

lm_df <- ent_df %>% 
  mutate_at(
    c(num_covars, bin_covars, vill_covars), 
    .funs = c(~arm::rescale(.))) %>% 
  select(q301_grid_use, q007_state, q009_district,
         c(num_covars, bin_covars, vill_covars, ent_cat)) 

elec_lm_a <- feols(q301_grid_use ~ vill_grid_hrs,
              data = lm_df, fixef = c("q007_state"))

elec_lm_b <- feols(as.formula(paste0("q301_grid_use", " ~ ",
                                   paste(vill_covars, collapse = " + "))),
                       data = lm_df, fixef = c("q007_state"))

elec_lm_c <- feols(as.formula(paste0("q301_grid_use", " ~ ",
                                     paste(num_covars, collapse = " + "),
                                     " + ",
                                     paste(bin_covars, collapse = " + "),
                                     " + ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = c("q007_state", "ent_cat"))

# Model lists
foels_list <- list(elec_lm_a, elec_lm_b, elec_lm_c)

etable(foels_list, 
       se = "cluster",
       cluster = c("q009_district"),
       digits = 3,
       dict = c(q301_grid_use = "Grid connected", 
                q215_area = "Floor area",
                q210_salaried_employees = "Salaried employees",
                q213_own = "Building owned",
                q214_pucca = "Building pucca",
                vill_markets = "Village marketplaces",
                vill_bpl = "Village share BPL households",
                vill_hh = "Village households",
                timetocity = "Time to city",
                vill_grid_hrs = "Village typical grid hours",
                vill_grid_all = "All hamlets grid connected",
                q007_state = "State",
                q009_district = "District",
                ent_cat = "Enterprise Category"),
       fixef_sizes = T,
       float = F,
       file = here("Manuscript", "Tables", "reg_grid.tex"))

# Q1b: Grid supply quality and propensity to acquire backups -------------------

boxplot <- ent_df %>%
  select(monthly_consumption, total_watts, elecsrc, num_covars, bin_covars) %>%
  pivot_longer(-elecsrc, names_to = "covar") %>% 
  mutate(covar = case_when(
    grepl(covar, pattern = "monthly_consumption") ~ "Monthly kWh",
    grepl(covar, pattern = "total_watts") ~ "Total appliance watts",
    grepl(covar, pattern = "q215_area") ~ "Floor area (square feet)",
    grepl(covar, pattern =  "q210_salaried_employees") ~ "Salaried employees",
    grepl(covar, pattern =  "q214_pucca") ~ "Building pucca (%)",
    grepl(covar, pattern =  "q213_own") ~ "Building owned (%)"),
    covar = factor(covar, levels = c("Grid connected (%)", 
                                     "Total appliance watts", "Monthly kWh",
                                     "Floor area (square feet)", "Salaried employees",
                                     "Building owned (%)", "Building pucca (%)"))
  ) 

a <- boxplot %>% 
  filter(covar == "Monthly kWh") %>% 
  ggplot(aes(elecsrc, value, group = elecsrc)) +
  geom_boxplot() +
  coord_cartesian(ylim = quantile(boxplot %>% 
                                    filter(covar == "Monthly kWh") %>% pull(value), c(0.05, 0.95))) +
  facet_wrap(~covar, scales = "free", strip.position = "left") +
  theme_bw() + 
  theme(strip.background = element_blank(), strip.placement = "outside") +
  labs(x = NULL, y = NULL)

b <- boxplot %>% 
  filter(covar == "Total appliance watts") %>% 
  ggplot(aes(elecsrc, value, group = elecsrc)) +
  geom_boxplot() +
  coord_cartesian(ylim = quantile(boxplot %>% 
                                    filter(covar == "Total appliance watts") %>% pull(value), c(0.05, 0.95))) +
  facet_wrap(~covar, scales = "free", strip.position = "left") +
  theme_bw() + 
  theme(strip.background = element_blank(), strip.placement = "outside") +
  labs(x = NULL, y = NULL)

c <- boxplot %>% 
  filter(covar == "Floor area (square feet)") %>% 
  ggplot(aes(elecsrc, value, group = elecsrc)) +
  geom_boxplot() +
  coord_cartesian(ylim = quantile(boxplot %>% 
                                    filter(covar == "Floor area (square feet)") %>% pull(value), c(0.05, 0.95))) +
  facet_wrap(~covar, scales = "free", strip.position = "left") +
  theme_bw() + 
  theme(strip.background = element_blank(), strip.placement = "outside") +
  labs(x = NULL, y = NULL)

d <- boxplot %>% 
  filter(covar == "Salaried employees") %>% 
  ggplot(aes(elecsrc, value, group = elecsrc)) +
  geom_boxplot() +
  coord_cartesian(ylim = c(0, 10)) +
  facet_wrap(~covar, scales = "free", strip.position = "left") +
  theme_bw() + 
  theme(strip.background = element_blank(), strip.placement = "outside") +
  labs(x = NULL, y = NULL)

e <- boxplot %>% 
  filter(covar == "Building pucca (%)") %>% 
  group_by(elecsrc, covar) %>% 
  summarise(value  = mean(value, na.rm = T)) %>% 
  ggplot(aes(elecsrc, value, group = elecsrc)) +
  geom_col() +
  coord_cartesian(ylim = quantile(boxplot %>% 
                                    filter(covar == "Building pucca (%)") %>% pull(value), c(0.05, 0.95))) +
  facet_wrap(~covar, scales = "free", strip.position = "left") +
  theme_bw() + 
  theme(strip.background = element_blank(), strip.placement = "outside") +
  labs(x = NULL, y = NULL)

f <- boxplot %>% 
  filter(covar == "Building owned (%)") %>% 
  group_by(elecsrc, covar) %>% 
  summarise(value  = mean(value, na.rm = T)) %>% 
  ggplot(aes(elecsrc, value, group = elecsrc)) +
  geom_col() +
  coord_cartesian(ylim = c(0,1)) +
  facet_wrap(~covar, scales = "free", strip.position = "left") +
  theme_bw() + 
  theme(strip.background = element_blank(), strip.placement = "outside") +
  labs(x = NULL, y = NULL)

wrap_plots(a,b,c,d,e,f, nrow = 2)

ggsave(here("Manuscript", "Figures", "2_elecsrcfirmcomparison.png"),
       device = "png", width = 10, height = 5)

lm_df <- ent_df %>% 
  filter(elecsrc_gridany == 1) %>% 
  mutate_at(
    c(num_covars, bin_covars, vill_covars), 
    .funs = c(~arm::rescale(.))) %>% 
  select(elecsrc_ogany, q007_state, q009_district,
         c(num_covars, bin_covars, vill_covars, ent_cat)) 

elec_lm_a <- feols(elecsrc_ogany ~ vill_grid_hrs,
                   data = lm_df, fixef = c("q007_state"))

elec_lm_b <- feols(as.formula(paste0("elecsrc_ogany", " ~ ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = c("q007_state"))

elec_lm_c <- feols(as.formula(paste0("elecsrc_ogany", " ~ ",
                                     paste(num_covars, collapse = " + "),
                                     " + ",
                                     paste(bin_covars, collapse = " + "),
                                     " + ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = c("q007_state", "ent_cat"))

# Model lists
foels_list <- list(elec_lm_a, elec_lm_b, elec_lm_c)

etable(foels_list, 
       se = "cluster",
       cluster = c("q009_district"),
       digits = 3,
       dict = c(elecsrc_ogany = "OG backup", 
                q215_area = "Floor area",
                q210_salaried_employees = "Salaried employees",
                q213_own = "Building owned",
                q214_pucca = "Building pucca",
                vill_markets = "Village marketplaces",
                vill_bpl = "Village share BPL households",
                vill_hh = "Village households",
                timetocity = "Time to city",
                vill_grid_hrs = "Village typical grid hours",
                vill_grid_all = "All hamlets grid connected",
                q007_state = "State",
                q009_district = "District",
                ent_cat = "Enterprise Category"),
       fixef_sizes = T,
       float = F,
       file = here("Manuscript", "Tables", "reg_gridogbackup.tex"))

# Q1c: Grid supply quality and electricity utilization -------------------------

bin_covars <- c("q213_own", "q214_pucca", "elecsrc_ogany")

lm_df <- ent_df %>% 
  filter(elecsrc_gridany == 1) %>% 
  mutate(
    across(
      c(num_covars, bin_covars, vill_covars), 
    ~arm::rescale(.))
    ) %>% 
  select(monthly_consumption, q007_state, q009_district,
         c(num_covars, bin_covars, vill_covars, ent_cat))

elec_lm_a <- feols(log(monthly_consumption+1) ~ vill_grid_hrs,
                   data = lm_df, fixef = "q007_state")

elec_lm_b <- feols(as.formula(paste0("log(monthly_consumption+1)", " ~ ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = "q007_state")

elec_lm_c <- feols(as.formula(paste0("log(monthly_consumption+1)", " ~ ",
                                     paste(num_covars, collapse = " + "),
                                     " + ",
                                     paste(bin_covars, collapse = " + "),
                                     " + ",
                                     "elecsrc_ogany",
                                     " + ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = c("q007_state","ent_cat"))

# Model lists
foels_list <- list(elec_lm_a, elec_lm_b, elec_lm_c)

etable(foels_list, 
       se = "cluster",
       cluster = c("q009_district"),
       digits = 3,
       dict = c(`log(monthly_consumption+1)` = "Log Monthly kWh", 
                q215_area = "Floor area",
                q210_salaried_employees = "Salaried employees",
                q213_own = "Building owned",
                q214_pucca = "Building pucca",
                elecsrc_ogany = "Uses OG Backup",
                vill_markets = "Village marketplaces",
                vill_bpl = "Village share BPL households",
                vill_hh = "Village households",
                vill_grid_hrs = "Village typical grid hours",
                vill_grid_all = "All hamlets grid connected",
                vill_mgpresent = "Minigrid present",
                q007_state = "State",
                q009_district = "District",
                ent_cat = "Enterprise Category"),
       fixef_sizes = T,
       float = F, 
       file = here("Manuscript", "Tables", "reg_cons_grid.tex"))

# Test linearity assumption of vill_grid_hrs and q301_grid_use
test <- lm(data = lm_df, log(monthly_consumption + 1) ~ q215_area + q210_salaried_employees + 
             q213_own + q214_pucca + elecsrc_ogany + elecsrc_ogany + vill_hh + 
             vill_markets + vill_grid_hrs + vill_grid_all + vill_bpl + q007_state + ent_cat)
par(mfrow = c(2, 2))
plot(test)
par(mfrow = c(1, 1))

lm_df <- ent_df %>% 
  filter(elecsrc_gridany == 1) %>% 
  arrange(vill_grid_hrs) %>%  
  mutate(vill_grid_hrs_bin = cut(vill_grid_hrs, breaks = seq(0, 24, 3), na.rm = TRUE,
                             include.lowest = TRUE, labels = paste0("hrs_", seq(0, 21, 3))),
         vals = 1
  ) %>% 
  pivot_wider(names_from = vill_grid_hrs_bin, values_from = vals, values_fill = list(vals = 0))

hrs_bins <- c("hrs_0", "hrs_3", "hrs_6", "hrs_9", "hrs_12", "hrs_18", "hrs_21")

lm_df <- lm_df %>% 
  mutate(
    across(
      c(num_covars, bin_covars, vill_covars, hrs_bins), 
      ~arm::rescale(.))
  ) %>% 
  select(monthly_consumption, q007_state, q009_district,
         c(hrs_bins, num_covars, bin_covars, vill_covars, ent_cat))

elec_lm_test <- feols(as.formula(paste0("log(monthly_consumption+1)", " ~ ",
                                     paste(hrs_bins, collapse = " + "),
                                     " + ",
                                     paste(num_covars, collapse = " + "),
                                     " + ",
                                     paste(bin_covars, collapse = " + "),
                                     " + ",
                                     "elecsrc_ogany",
                                     " + ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = c("q007_state","ent_cat"))

fixest::coefplot(elec_lm_test, keep = hrs_bins)

# Q2a Understanding productive electricity utilisation -------------------------

a <- ent_df %>% 
  filter(elecsrc != "None", srvs != "None") %>% 
  mutate(elecsrc = factor(elecsrc, levels = c("Off-grid", "Grid", "Grid + Off-grid"))) %>% 
  dplyr::count(elecsrc, srvs) %>%
  group_by(elecsrc) %>% 
  mutate(prop = n / sum(n)) %>% 
  ggplot(aes(fct_rev(elecsrc), n, label = scales::percent(prop, accuracy = 1), fill = fct_rev(srvs))) +
  geom_col() +
  geom_text(position = position_stack(vjust = 0.5)) +
  coord_flip() +
  scale_fill_viridis_d(direction = -1, begin = 0.4) +
  theme_bw() +
  guides(fill = guide_legend(reverse = T)) +
  theme(strip.background = element_rect(fill = "white", color = "black"),
        plot.caption = element_text(hjust = 1),
        legend.position = "top") +
  labs(y = "Number of firms",
       x = NULL,
       fill = NULL)

b <- ent_df %>%
  filter(elecsrc != "None", srvs != "None") %>% 
  mutate(elecsrc = factor(elecsrc, levels = c("Off-grid", "Grid", "Grid + Off-grid"))) %>% 
  pivot_longer(cols = matches("_watts"), names_to = "Service", values_to = "Watts") %>% 
  select(elecsrc, Service, Watts) %>% 
  mutate(Service = case_when(
    grepl(Service, pattern = "frdg") ~ "Fridge",
    grepl(Service, pattern = "ict") ~ "ICTs",
    grepl(Service, pattern = "light") ~ "Light",
    grepl(Service, pattern = "mech") ~ "Mechanical",
    grepl(Service, pattern = "spcl") ~ "Fans",
    grepl(Service, pattern = "total") ~ "Total"),
    Service = factor(Service, levels = c("Light", "Fans", "ICTs", "Fridge", "Mechanical", "Total"))
  ) %>% 
  ggplot(aes(Watts, colour = factor(elecsrc))) +
  stat_ecdf(geom = "step") +
  coord_cartesian(xlim = c(0,500)) +
  theme_bw() +
  theme(strip.background = element_rect(fill = "white", color = "black"),
        plot.caption = element_text(hjust = 1),
        legend.position = "top") +
  labs(x = "Total Appliance Watts",
       y = "Cumulative Proportion of Firms",
       colour = "Source") +
  facet_wrap(~Service)

c <- ent_df %>%
  filter(elecsrc != "None", srvs != "None") %>% 
  mutate(elecsrc = factor(elecsrc, levels = c("Off-grid", "Grid", "Grid + Off-grid"))) %>% 
  pivot_longer(cols = c(matches("_kwh"), monthly_consumption), names_to = "Service", values_to = "Watts") %>% 
  filter(!grepl(Service, pattern = "dummy")) %>% 
  select(elecsrc, Service, Watts) %>% 
  mutate(Service = case_when(
    grepl(Service, pattern = "frdg") ~ "Fridge",
    grepl(Service, pattern = "ict") ~ "ICTs",
    grepl(Service, pattern = "light") ~ "Light",
    grepl(Service, pattern = "mech") ~ "Mechanical",
    grepl(Service, pattern = "spcl") ~ "Fans",
    grepl(Service, pattern = "monthly") ~ "Total"),
    Service = factor(Service, levels = c("Light", "Fans", "ICTs", "Fridge", "Mechanical", "Total"))
  ) %>% 
  ggplot(aes(Watts, colour = factor(elecsrc))) +
  stat_ecdf(geom = "step") +
  coord_cartesian(xlim = c(0,100)) +
  theme_bw() +
  theme(strip.background = element_rect(fill = "white", color = "black"),
        plot.caption = element_text(hjust = 1),
        legend.position = "top") +
  labs(x = "Monthly kWh",
       y = "Cumulative Proportion of Firms",
       colour = "Source") +
  facet_wrap(~Service)

a / b / c + plot_layout(heights = c(0.5,1,1))

ggsave(here("Manuscript", "Figures", "3_enttypeempkwh.png"),
       device = "png", width = 7, height = 12)

a <- ent_df %>% 
  filter(elecsrc != "None", srvs != "None", !ent_cat %in% c("Misc", "Agri")) %>% 
  dplyr::count(ent_cat, srvs) %>%
  group_by(ent_cat) %>% 
  mutate(prop = n / sum(n)) %>% 
  ggplot(aes(fct_rev(ent_cat), n, label = scales::percent(prop, accuracy = 1), fill = fct_rev(srvs))) +
  geom_col() +
  geom_text_repel(position = position_stack(vjust = 0.5), direction = "x") +
  coord_flip() +
  scale_fill_viridis_d(direction = -1, begin = 0.4) +
  theme_bw() +
  guides(fill = guide_legend(reverse = T)) +
  theme(strip.background = element_rect(fill = "white", color = "black"),
        plot.caption = element_text(hjust = 1),
        legend.position = "top") +
  labs(y = "Number of firms",
       x = NULL,
       fill = "Services")

b <- ent_df %>%
  filter(elecsrc != "None", srvs != "None", !ent_cat %in% c("Misc", "Agri")) %>% 
  select(ent_cat, total_watts) %>%
  ggplot(aes(total_watts, colour = factor(ent_cat))) +
  stat_ecdf(geom = "step") +
  coord_cartesian(xlim = c(0,500)) +
  theme_bw() +
  theme(strip.background = element_rect(fill = "white", color = "black"),
        plot.caption = element_text(hjust = 1),
        legend.position = "top") +
  labs(x = "Total Appliance Watts",
       y = "Cumulative Proportion of Firms",
       colour = "Firm category")

c <- ent_df %>%
  filter(elecsrc != "None", srvs != "None", !ent_cat %in% c("Misc", "Agri")) %>% 
  select(ent_cat, monthly_consumption) %>%
  ggplot(aes(monthly_consumption, colour = factor(ent_cat))) +
  stat_ecdf(geom = "step") +
  coord_cartesian(xlim = c(0,100)) +
  theme_bw() +
  theme(strip.background = element_rect(fill = "white", color = "black"),
        plot.caption = element_text(hjust = 1),
        legend.position = "top") +
  labs(x = "Monthly kWh",
       y = "Cumulative Proportion of Firms",
       colour = "Firm category")

wrap_plots(a, wrap_plots(b,c), nrow = 2) + plot_layout(heights = c(0.5,1,1), guides = "collect") & 
  theme(legend.position = "bottom")

ggsave(here("Manuscript", "Figures", "3b_entcatempkwh.png"),
       device = "png", width = 10, height = 7)

# Robustness checks - analyses in fully electrified grid villages --------------

num_covars <- c("q215_area", "q210_salaried_employees")

bin_covars <- c("q213_own", "q214_pucca")

vill_covars <- c("vill_hh", "vill_markets", "vill_grid_hrs", "vill_bpl")

# Grid connection likelihoods
lm_df <- ent_df %>% 
  filter(vill_grid_all == 1, vill_elec_type == "Grid (Public)") %>% 
  mutate_at(
    c(num_covars, bin_covars, vill_covars), 
    .funs = c(~arm::rescale(.))) %>% 
  select(q301_grid_use, q007_state, q009_district,
         c(num_covars, bin_covars, vill_covars, ent_cat))

elec_lm_a <- feols(q301_grid_use ~ vill_grid_hrs,
                   data = lm_df, fixef = c("q007_state"))

elec_lm_b <- feols(as.formula(paste0("q301_grid_use", " ~ ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = c("q007_state"))

elec_lm_c <- feols(as.formula(paste0("q301_grid_use", " ~ ",
                                     paste(num_covars, collapse = " + "),
                                     " + ",
                                     paste(bin_covars, collapse = " + "),
                                     " + ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = c("q007_state", "ent_cat"))

foels_list <- list(elec_lm_a, elec_lm_b, elec_lm_c)

etable(foels_list, 
       se = "cluster",
       cluster = c("q009_district"),
       digits = 3,
       dict = c(q301_grid_use = "Grid connected", 
                q215_area = "Floor area",
                q210_salaried_employees = "Salaried employees",
                q213_own = "Building owned",
                q214_pucca = "Building pucca",
                vill_markets = "Village marketplaces",
                vill_bpl = "Village share BPL households",
                vill_hh = "Village households",
                timetocity = "Time to city",
                vill_grid_hrs = "Village typical grid hours",
                q007_state = "State",
                q009_district = "District",
                ent_cat = "Enterprise Category"),
       fixef_sizes = T,
       float = F,
       file = here("Manuscript", "Tables", "robustness_reg_grid.tex"))

# Propensity to acquire backups

lm_df <- ent_df %>% 
  filter(elecsrc_gridany == 1, vill_grid_all == 1, vill_elec_type == "Grid (Public)") %>% 
  mutate_at(
    c(num_covars, bin_covars, vill_covars), 
    .funs = c(~arm::rescale(.))) %>% 
  select(elecsrc_ogany, q007_state, q009_district,
         c(num_covars, bin_covars, vill_covars, ent_cat)) 

elec_lm_a <- feols(elecsrc_ogany ~ vill_grid_hrs,
                   data = lm_df, fixef = c("q007_state"))

elec_lm_b <- feols(as.formula(paste0("elecsrc_ogany", " ~ ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = c("q007_state"))

elec_lm_c <- feols(as.formula(paste0("elecsrc_ogany", " ~ ",
                                     paste(num_covars, collapse = " + "),
                                     " + ",
                                     paste(bin_covars, collapse = " + "),
                                     " + ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = c("q007_state", "ent_cat"))

# Model lists
foels_list <- list(elec_lm_a, elec_lm_b, elec_lm_c)

etable(foels_list, 
       se = "cluster",
       cluster = c("q009_district"),
       digits = 3,
       dict = c(elecsrc_ogany = "OG backup", 
                q215_area = "Floor area",
                q210_salaried_employees = "Salaried employees",
                q213_own = "Building owned",
                q214_pucca = "Building pucca",
                vill_markets = "Village marketplaces",
                vill_bpl = "Village share BPL households",
                vill_hh = "Village households",
                timetocity = "Time to city",
                vill_grid_hrs = "Village typical grid hours",
                q007_state = "State",
                q009_district = "District",
                ent_cat = "Enterprise Category"),
       fixef_sizes = T,
       float = F,
       file = here("Manuscript", "Tables", "robustness_reg_gridogbackup.tex"))

# Electricity consumption

bin_covars <- c("q213_own", "q214_pucca", "elecsrc_ogany")

lm_df <- ent_df %>% 
  filter(elecsrc_gridany == 1, vill_grid_all == 1, vill_elec_type == "Grid (Public)") %>% 
  mutate(
    across(
      c(num_covars, bin_covars, vill_covars), 
      ~arm::rescale(.))
  ) %>% 
  select(monthly_consumption, q007_state, q009_district,
         c(num_covars, bin_covars, vill_covars, ent_cat))

elec_lm_a <- feols(log(monthly_consumption+1) ~ vill_grid_hrs,
                   data = lm_df, fixef = "q007_state")

elec_lm_b <- feols(as.formula(paste0("log(monthly_consumption+1)", " ~ ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = "q007_state")

elec_lm_c <- feols(as.formula(paste0("log(monthly_consumption+1)", " ~ ",
                                     paste(num_covars, collapse = " + "),
                                     " + ",
                                     paste(bin_covars, collapse = " + "),
                                     " + ",
                                     "elecsrc_ogany",
                                     " + ",
                                     paste(vill_covars, collapse = " + "))),
                   data = lm_df, fixef = c("q007_state","ent_cat"))

# Model lists
foels_list <- list(elec_lm_a, elec_lm_b, elec_lm_c)

etable(foels_list, 
       se = "cluster",
       cluster = c("q009_district"),
       digits = 3,
       dict = c(`log(monthly_consumption+1)` = "Log Monthly kWh", 
                q215_area = "Floor area",
                q210_salaried_employees = "Salaried employees",
                q213_own = "Building owned",
                q214_pucca = "Building pucca",
                elecsrc_ogany = "Uses OG Backup",
                vill_markets = "Village marketplaces",
                vill_bpl = "Village share BPL households",
                vill_hh = "Village households",
                vill_grid_hrs = "Village typical grid hours",
                vill_grid_all = "All hamlets grid connected",
                vill_mgpresent = "Minigrid present",
                q007_state = "State",
                q009_district = "District",
                ent_cat = "Enterprise Category"),
       fixef_sizes = T,
       float = F, 
       file = here("Manuscript", "Tables", "robustness_reg_cons_grid.tex"))
